% Main Script: run_mc_empirical.m
%   Description:
%       This script produces the results of a numerical experiment calibrated to imitate an actual
%       empirical setting, where the experiment is based on the seminal studies of the effects of
%       unilateral divorce law reforms on the US divorce rates by Friedberg (1998) and Wolfers
%       (2006), subsequently revisited by Kim and Oka (2014) and Moon and Weidner (2015) in the
%       context of interactive fixed effects models
%   Output:
%       Table 4: Simulation results for the empirically calibrated experiment, N = 48, T = 33, R = 1

% ========================================
% Data from the studies of the effects of unilateral divorce law reforms on the US divorce rates
% ========================================
% ==============================
% Clean the data
% ==============================
% We first repeat the Table 3 in Kim and Oka (2014) by fixing some coding issues
clear all;
% Load the data, where the original data and code are from Kim and Oka (2014)
DATA  = csvread('DivorceRate1956_1988.csv', 1, 1);
% Construct state ID and drop IN (id == 16) and NM (id == 33)
id_st = DATA(:,1);
ind02  = (id_st ~= 16 & id_st ~= 33);
DATA02 = DATA(ind02,:);
id_st = DATA02(:,1);
year  = DATA02(:,2);
% Dependent variable
Y02 = (DATA02(:,4));
% State population
stpop = DATA02(:,5);
% Dummy variable for Friedberg's unilateral law
d_unilateral = DATA02(:,6);
% Dynamic treatment effects for Friedberg's unilateral law
dyn_unilateral = DATA02(:,7:14);
% sample size
NT = size(DATA02, 1);
T  = max(year) - min(year) + 1;
N  = NT / T;
% dimension of the regressors
K = 8;
% Convert dependend variable and regressors in NT-vector form:
YY=Y02;
XX=dyn_unilateral;
% ==============================
% Transform data into matrix form for later steps
% ==============================
Y = (reshape(YY,[T,N]))';
X = zeros(K,N,T);
for k=1:K
  X(k,:,:) = (reshape(XX(:,k),[T,N]))';
end
% Using only one regressor
D(1,:,:) = squeeze(sum(X));
X = D;
K = 1;

% ========================================
% Estimation using LS_factor
% ========================================
beta0 = zeros(K,1);
% Maximum number of the factors
Rmax = 9;
% Control for standard time effects
lambda_known=ones(N,1);
% Control for standard individual specific effects + time trend + time trend.^2
trend=(1:T)';
f_known=[ones(T,1),trend,trend.^2];
% Project out (generalized within transformation) all known factor loadings
if size(lambda_known,2)>0
    Y=Y - lambda_known * (lambda_known\Y);
    for k=1:K
        xx=squeeze(X(k,:,:));
        X(k,:,:)= xx - lambda_known * (lambda_known\xx);
    end
end
% Project out (generalized within transformation) all known factors
if size(f_known,2)>0
    Y=( Y' - f_known * (f_known\Y') )';
    for k=1:K
        xx=squeeze(X(k,:,:));
        X(k,:,:)= ( xx' - f_known * (f_known\xx') )';
    end
end
% Estimation
R = 0;
[beta,exitflag,lambda,f,Vbeta1,Vbeta2,Vbeta3,bcorr1,bcorr2,bcorr3]=LS_factor2(Y,X,R,lambda_known,f_known,'silent',10^-8,'m1',beta0,30,300,2,2);
beta_naiv=beta;
beta_bias_corrected=beta - bcorr2 - bcorr3;
std_est_homo=sqrt(diag(Vbeta1));
std_est_hetero=sqrt(diag(Vbeta2));

% ========================================
% Simulation with calibrated data
% ========================================
% ==============================
% Parameters setting
% ==============================
beta0 = beta_bias_corrected;
Gamma0 = lambda*f';
U = Y - squeeze(X)*beta0 - Gamma0;
sigma_U = sqrt(sum(sum(U.^2))/((N-1)*(T-3) - K ));
[V,S,W] = svds(squeeze(X),1);
GammaX = V*S*W';
Y_0 = squeeze(X)*beta0 + Gamma0;
% ==============================
% Simulation setting
% ==============================
kappa_grid = [0:0.20:2.0];
reps = 5000;
R = 1;
alpha = 0.05;
parpool('local', 8)
% Prepare arrays for the outputs
beta_LS = zeros(reps,length(kappa_grid));
se_LS = zeros(reps,length(kappa_grid));
LB_LS = zeros(reps,length(kappa_grid));
UB_LS = zeros(reps,length(kappa_grid));
beta_h = zeros(reps,length(kappa_grid));
se_h = zeros(reps,length(kappa_grid));
LB_h = zeros(reps,length(kappa_grid));
UB_h = zeros(reps,length(kappa_grid));
LB_h_s = zeros(reps,length(kappa_grid));
UB_h_s = zeros(reps,length(kappa_grid));
% ==============================
% Loop over all replications
% ==============================
parfor rep = 1:reps
    ticID_iter = tic;
    rng(12345+9876*rep)
    % Prepare arrays for the outputs
    beta_LS_tmp = zeros(1,length(kappa_grid));
    se_LS_tmp = zeros(1,length(kappa_grid));
    LB_LS_tmp = zeros(1,length(kappa_grid));
    UB_LS_tmp = zeros(1,length(kappa_grid));
    beta_h_tmp = zeros(1,length(kappa_grid));
    se_h_tmp = zeros(1,length(kappa_grid));
    LB_h_tmp = zeros(1,length(kappa_grid));
    UB_h_tmp = zeros(1,length(kappa_grid));
    LB_h_s_tmp = zeros(1,length(kappa_grid));
    UB_h_s_tmp = zeros(1,length(kappa_grid));
    % Loop over all kappa
    Y_0_tmp = Y_0 + sigma_U*randn([N,T]);
    for i_kappa=1:length(kappa_grid)
        kappa = kappa_grid(i_kappa);
        Y_tmp = Y_0_tmp + kappa*GammaX;       
        % Estimate with LS factor
        [beta,exitflag,lambda,f,Vbeta1,Vbeta2,Vbeta3,bcorr1,bcorr2,bcorr3]=LS_factor(Y_tmp,X,R,'silent',10^-8,'m1',beta0,30,300,2,2);
        beta_LS_tmp(i_kappa) = beta - bcorr2 - bcorr3;
        se_LS_tmp(i_kappa) = sqrt(diag(Vbeta2));
        LB_LS_tmp(i_kappa) = beta_LS_tmp(i_kappa) - se_LS_tmp(i_kappa)*norminv(1 - alpha/2);
        UB_LS_tmp(i_kappa) = beta_LS_tmp(i_kappa) + se_LS_tmp(i_kappa)*norminv(1 - alpha/2);
        % Estimate with honest weak factor
        [beta, bias, se, LB, UB, LB_s, UB_s, A] = honest_weak_factors(Y_tmp, X, R);
        beta_h_tmp(i_kappa) = beta;
        se_h_tmp(i_kappa) = se;
        LB_h_tmp(i_kappa) = LB;
        UB_h_tmp(i_kappa) = UB;
        LB_h_s_tmp(i_kappa) = LB_s;
        UB_h_s_tmp(i_kappa) = UB_s;                               
    end
    % ==============================
    % Prepare the results
    % ==============================
    beta_LS(rep,:) = beta_LS_tmp;
    se_LS(rep,:) = se_LS_tmp;
    LB_LS(rep,:) = LB_LS_tmp;
    UB_LS(rep,:) = UB_LS_tmp;
    beta_h(rep,:) = beta_h_tmp;
    se_h(rep,:) = se_h_tmp;
    LB_h(rep,:) = LB_h_tmp;
    UB_h(rep,:) = UB_h_tmp;
    LB_h_s(rep,:) = LB_h_s_tmp;
    UB_h_s(rep,:) = UB_h_s_tmp;    
    if  ~mod(rep,1000) || (rep == 1)
        fprintf('rep %3.0f/%1.0f, took %1.2fs\n', rep, reps, toc(ticID_iter));
    end    
end
% Close parallel computation
if isunix
    delete(gcp('nocreate'));
end

% ========================================
% Save the results
% ========================================
if not(isfolder('matlogs'))
    mkdir('matlogs')
end
outputfile = sprintf(['matlogs/mc_empirical_N%i_T%i_R0%i_reps%i'],N,T,R,reps);
disp(['# Save result to file:' outputfile])
save([outputfile '.mat']);

% ========================================
% Output Table 4 as CSV file
% ========================================
if ~isfolder('tables')
    mkdir('tables')
end
fileinfo = dir(['matlogs/*.mat']);
f_names = {fileinfo.name};
f_names = f_names(contains(f_names,'mc_empirical'));
load(['matlogs/' f_names{:}]);
% Summary statistics
stats_LS = gen_stats (beta_LS, beta0, se_LS);
stats_LS.size = stats_LS.rp(2,:);
stats_h = gen_stats (beta_h, beta0, se_h);
stats_h.length = mean(UB_h - LB_h);
stats_h.size = mean( (beta0 < LB_h) | (beta0 > UB_h) );
stats_h.length_s = mean(UB_h_s - LB_h_s);
stats_h.size_s = mean( (beta0 < LB_h_s) | (beta0 > UB_h_s) );
% Construct array with the results
tab_arr = [];
for i_kappa=1:length(kappa_grid)
    tab_arr = [tab_arr; ...
        kappa_grid(i_kappa), ...
        stats_LS.bias(i_kappa),stats_LS.std(i_kappa),stats_LS.rmse(i_kappa),stats_LS.rp(2,i_kappa)*100,stats_LS.length(2,i_kappa),stats_LS.oracle_length(2,i_kappa), ...
        stats_h.bias(i_kappa),stats_h.std(i_kappa),stats_h.rmse(i_kappa),stats_h.size(i_kappa)*100,stats_h.length(i_kappa),stats_h.oracle_length(2,i_kappa)];
end
% Convert array to table
columnLabels = {'kappa'};
basic_estimators = {'LS','H'};
for i_b=1:length(basic_estimators)
    columnLabels = [columnLabels, {[basic_estimators{i_b} ' bias']}, {[basic_estimators{i_b} ' std']}, {[basic_estimators{i_b} ' rmse']}, ...
        {[basic_estimators{i_b} ' size']}, {[basic_estimators{i_b} ' len']}, {[basic_estimators{i_b} ' LFCV']}];
end
tab4 = array2table(tab_arr, 'VariableNames', columnLabels);
% Output table
writetable(tab4, 'tables/table4.csv');
